clear all
cd 'Path to main folder'
%Feature vectors, indices matches those of values for the authors.
%FWords = importdata('functionwords.txt','\n');
nfeat=200;
title=[num2str(nfeat),' Features']
FWords = importdata('freqwords400.txt','\n');
FNFrames=importdata('Frames.txt','\n');
Zlimit=3.84146;
%crossvalidation parameters
classifierparameter=3;
CrossValK=0; %use 0 for leave on out validation
classifier=@multiclassSVM; %choos classification engine

%authors={'Dostoyevski';'Tolstoy';'Turgenev';'Gogol'};
authors={'Garnett','Hogarth','Hapgood'};
%authors={'Dostoyevski2';'Tolstoy2';'Turgenev2';'Gogol'};%'Jay';'Hamilton and Madison';'Hamilton or Madison'};
%authors={'Hamilton','Madison','Jay'};%'Jay';'Hamilton and Madison';'Hamilton or Madison'};

%speciefies wether to load vars from file or perform new count.
useIG=true;
%reads all author files at path, and stores in cell array.
trainauths=length(authors);

%create profiles based of word counts
%also creates the author list to go with both this and the frame profile.
    auth_idx=[]; %indices corresponds to indices in thee profiles, this value can then be looked up in authors for a human readable name.
    fw_profiles=[];
    for idx=1:length(authors)
        oldsize=size(fw_profiles,1);
        fw_profiles=[fw_profiles;readauthor(authors{idx},'fw',FWords)];
        tmpidx=ones(size(fw_profiles,1)-oldsize,1)*idx;
        auth_idx=[auth_idx;tmpidx];
    end

    %count 3-grams and create profile
        prof_3gram=[];
        ngrams=java.util.HashMap;
        for idx=1:length(authors)
    %        oldsize=size(fw_profiles,1);
            tmp=readauthor(authors{idx},'ngram',{3,ngrams});
            tmpprofile=tmp{1};
            ngrams=tmp{2};
            diffsize=size(ngrams)-size(prof_3gram,2);
            if diffsize>0
                prof_3gram=[prof_3gram zeros(size(prof_3gram,1),diffsize)];
            end
            prof_3gram=[prof_3gram;tmpprofile];
        end

        %count 4-grams and create profile
        prof_4gram=[];
        ngrams=java.util.HashMap;
        for idx=1:length(authors)
    %        oldsize=size(fw_profiles,1);
            tmp=readauthor(authors{idx},'ngram',{4,ngrams});
            tmpprofile=tmp{1};
            ngrams=tmp{2};
            diffsize=size(ngrams)-size(prof_4gram,2);
            if diffsize>0
                prof_4gram=[prof_4gram zeros(size(prof_4gram,1),diffsize)];
            end
            prof_4gram=[prof_4gram;tmpprofile];
        end
        
        %count 5-grams and create profile
       prof_5gram=[];
        ngrams=java.util.HashMap;
        for idx=1:length(authors)
    %        oldsize=size(fw_profiles,1);
            tmp=readauthor(authors{idx},'ngram',{5,ngrams});
            tmpprofile=tmp{1};
            ngrams=tmp{2};
            diffsize=size(ngrams)-size(prof_5gram,2);
            if diffsize>0
                prof_5gram=[prof_5gram zeros(size(prof_5gram,1),diffsize)];
            end
            prof_5gram=[prof_5gram;tmpprofile];
        end
        
        
        
    %create profiles based on frame counts.
    frame_profiles=[];
    for idx=1:length(authors)
        frame_profiles=[frame_profiles;readauthor(authors{idx},'frame',FNFrames)];
    end

groups=auth_idx(find(auth_idx<trainauths+1));


%***********************************************************%
%*           Frequent word based classification            *%
%***********************************************************%

%choose training data
training1=fw_profiles(find(auth_idx<trainauths+1),:);
training1=normalizeprofile(training1);

%do crossvalidation
[c_res1,binres1] = crossval2(training1,groups,CrossValK,classifier,classifierparameter,useIG,[],nfeat);
[pr1,ac1] = prec_recall(c_res1)
[b,c]=mkNull(binres1,groups);
Z1=mcnemar(b,c)


%***********************************************************%
%*           ngram based classification            *%
%***********************************************************%

% %choose training data
tng=prof_3gram(find(auth_idx<trainauths+1),:);

%do crossvalidation
[c_res3g,binres3g] = crossval2(tng,groups,CrossValK,classifier,classifierparameter,useIG,[],nfeat);
[pr3g,ac3g] = prec_recall(c_res3g)
[b,c]=mkNull(binres3g,groups);
Z3g=mcnemar(b,c)

% %choose training data
tng=prof_4gram(find(auth_idx<trainauths+1),:);

%do crossvalidation
[c_res4g,binres4g] = crossval2(tng,groups,CrossValK,classifier,classifierparameter,useIG,[],nfeat);
[pr4g,ac4g] = prec_recall(c_res4g)
[b,c]=mkNull(binres4g,groups);
Z4g=mcnemar(b,c)

% %choose training data
tng=prof_5gram(find(auth_idx<trainauths+1),:);

%do crossvalidation
[c_res5g,binres5g] = crossval2(tng,groups,CrossValK,classifier,classifierparameter,useIG,[],nfeat);
[pr5g,ac5g] = prec_recall(c_res5g)
[b,c]=mkNull(binres5g,groups);
Z5g=mcnemar(b,c)


%***********************************************************%
%*                Frame based classification               *%
%***********************************************************%



%test classification using frames
training2=frame_profiles(find(auth_idx<trainauths+1),:);


training2=normalizeprofile(training2);
%perform cross validation
[c_res2,binres2] = crossval2(training2,groups,CrossValK,classifier,classifierparameter,useIG,[],nfeat);
[pr2,ac2] = prec_recall(c_res2)
[b,c]=mkNull(binres2,groups);
Z2=mcnemar(b,c)




%***********************************************************%
%*       Frame and frequent word based classification      *%
%***********************************************************%

%test classification using frames and function words combined. aka.
%combining the previous two feature vectors.

training3=[training2,training1];

%perform cross validation
[c_res3,binres3] = crossval2(training3,groups,CrossValK,classifier,classifierparameter,useIG,[400],nfeat);
[pr3,ac3] = prec_recall(c_res3)
[b,c]=mkNull(binres3,groups);
Z3=mcnemar(b,c)
Z3_1=mcnemar(sum(binres3 & ~binres1),sum(binres1 & ~binres3))
